{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dc7282dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from mlxtend.frequent_patterns import apriori\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca0805e3",
   "metadata": {},
   "source": [
    "1.以\";” 为分隔符读取数据中的ID和fruit列，将fruit 列通过lambda 函数，以逗号为分割符进行切分并去除字符串中的空格，并生成矩阵df_mtix。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e3ccff78",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>fruit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>apple ,banana</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Orange ,banana,apple</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Orange ,banana,grape</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>grape ,apple</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>pineapple ,Orange</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID                  fruit\n",
       "0   1         apple ,banana \n",
       "1   2   Orange ,banana,apple\n",
       "2   3  Orange ,banana,grape \n",
       "3   4           grape ,apple\n",
       "4   5      pineapple ,Orange"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./data/fruit.csv',sep=\";\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9cdeb0b3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0              [apple, banana]\n",
       "1      [Orange, banana, apple]\n",
       "2      [Orange, banana, grape]\n",
       "3               [grape, apple]\n",
       "4          [pineapple, Orange]\n",
       "5    [pineapple, banana, pear]\n",
       "6                [pear, apple]\n",
       "7        [pear, banana, grape]\n",
       "8           [grape, pineapple]\n",
       "Name: fruit, dtype: object"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#由考生填写\n",
    "df_mtix=df['fruit'].map(lambda line:line.replace(' ','').split(','))\n",
    "df_mtix\n",
    "#由考生填写"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a263614",
   "metadata": {},
   "source": [
    "2.使用 TransactionEncoder 函数对矩阵进行拟合df_mtix，生成数据 df_te。将数据 df_te 转换为DataFrame，使用df_te.columns 为列名。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5be71f9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#由考生填写\n",
    "from mlxtend.preprocessing import TransactionEncoder\n",
    "te = TransactionEncoder()\n",
    "data_te = te.fit_transform(df_mtix)\n",
    "df_te = pd.DataFrame(data=data_te,columns=te.columns_)\n",
    "#由考生填写"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "50fd1bce",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Orange</th>\n",
       "      <th>apple</th>\n",
       "      <th>banana</th>\n",
       "      <th>grape</th>\n",
       "      <th>pear</th>\n",
       "      <th>pineapple</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Orange  apple  banana  grape   pear  pineapple\n",
       "0   False   True    True  False  False      False\n",
       "1    True   True    True  False  False      False\n",
       "2    True  False    True   True  False      False\n",
       "3   False   True   False   True  False      False\n",
       "4    True  False   False  False  False       True\n",
       "5   False  False    True  False   True       True\n",
       "6   False   True   False  False   True      False\n",
       "7   False  False    True   True   True      False\n",
       "8   False  False   False   True  False       True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_te"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6ce122df",
   "metadata": {},
   "source": [
    "3.使用 fpgrowth 算法建模发现数据中的关联规则,指定min_support=0.5,use_colnames=True 使用mlxtend库中的associatio_rules 函数来找出关联规则。指定metric为'confidence',min_threshold 为0.3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0ded4fcd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from mlxtend.frequent_patterns import fpgrowth\n",
    "from mlxtend.frequent_patterns import association_rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6e927c53",
   "metadata": {},
   "outputs": [],
   "source": [
    "#由考生填写\n",
    "model = fpgrowth(df=df_te,min_support=0.5,use_colnames=True)\n",
    "rules = association_rules(df=model,metric='confidence',min_threshold=0.3,support_only=True,) \n",
    "#由考生填写\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a3ca97f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>antecedents</th>\n",
       "      <th>consequents</th>\n",
       "      <th>antecedent support</th>\n",
       "      <th>consequent support</th>\n",
       "      <th>support</th>\n",
       "      <th>confidence</th>\n",
       "      <th>lift</th>\n",
       "      <th>leverage</th>\n",
       "      <th>conviction</th>\n",
       "      <th>zhangs_metric</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]\n",
       "Index: []"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d18042e",
   "metadata": {},
   "source": [
    "4.输出关联规则rules中，前验置信度>0.6,置信度大于0.5的项集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f525939c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>antecedents</th>\n",
       "      <th>consequents</th>\n",
       "      <th>antecedent support</th>\n",
       "      <th>consequent support</th>\n",
       "      <th>support</th>\n",
       "      <th>confidence</th>\n",
       "      <th>lift</th>\n",
       "      <th>leverage</th>\n",
       "      <th>conviction</th>\n",
       "      <th>zhangs_metric</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]\n",
       "Index: []"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# support 支持度\n",
    "# confidence 置信度\n",
    "# lift 提升度\n",
    "#由考生填写\n",
    "rules[(rules['antecedent support']>=0.6) & (rules['confidence']>0.5)]\n",
    "#由考生填写"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7d885ad",
   "metadata": {},
   "source": [
    "5.输出关联规则rules中，antecedents为{苹果，香蕉}的内容"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "89ef9f01",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>antecedents</th>\n",
       "      <th>consequents</th>\n",
       "      <th>antecedent support</th>\n",
       "      <th>consequent support</th>\n",
       "      <th>support</th>\n",
       "      <th>confidence</th>\n",
       "      <th>lift</th>\n",
       "      <th>leverage</th>\n",
       "      <th>conviction</th>\n",
       "      <th>zhangs_metric</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]\n",
       "Index: []"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#由考生填写\n",
    "rules[rules['antecedents']=={'apple','banana'}]\n",
    "#由考生填写"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
